*! version 5.0
* 13 August 2018
* NIDS
* Master Income do file for Nids Wave 1

* THIS IS 3rd INCOME DO FILE - PREPARING VARIABLES FOR IMPUTATION: 3 OF 7
* THIS DO FILE PREPARES THE RELEVANT INCOME VARIABLES FOR IMPUTATION

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "W1 Income do file (1 of 7)" 


*=====================================================================================================================================

* OPENING THE DATASET CREATED IN THE DO FILE "Income - Merging datasets to create income variables (2 of 7)"

use "$DataOUT\data.dta", clear

merge m:1 w1_hhid using "$DataOUT\hhagric.dta" 
drop _merge
erase "$DataOUT\hhagric.dta"
*-------------------------------------------------------------------------------------------------------------------------------------

* RESPONSE STATUS

* Variable that indicates if a person has a response (incl proxies) at all
gen response=w1_a_refexpl==.
replace response=0 if w1_a_refexpl!=. & w1_a_gen!=.
label variable response "Variable to indicate if the person responded or not including proxies"
label values response dummy

*Variable that indicates the outcome of attempted sampling of this unit
gen responseoutcome=response
replace responseoutcome=2 if proxy==1
label define responseoutcome 0 "Non-response" 1 "Adult" 2 "Proxy"
label values responseoutcome responseoutcome 


* DEMOGRAPHICS

* interview date
gen intmonth=w1_a_intrv_m
replace intmonth=w1_p_intrv_m if intmonth==. & w1_p_intrv_m!=.

* Gender
********
recode w1_best_gen (1=1 "Male") (2=0 "Female"), gen(male)
label variable male "Dummy variable for male"
gen male_d=male!=.

* Race (best) variable
********
rename w1_best_race race
gen race_d=race!=.
replace race=1 if race==. /*Set race to African for all non-responders, but we control for this with a dummy*/
recode race (1=1 "African") (2/5=0 "Non-African"), gen(african)
recode race (2=1 "Coloured") (3/5=0 "Non-Coloured") (1=0 "Non-Coloured") , gen(coloured)
recode race (3=1 "Asian_Indian") (1/2=0 "Non-asian_indian") (4/5=0 "Non-asian_indian"), gen(asian_indian)
recode race (4=1 "White") (1/3=0 "Non-White") (5=0 "Non-White"), gen(white)
recode race (5=1 "Other") (1/4=0 "Non-Other"), gen(other)

* Age
********
rename w1_best_age_yrs age
replace age=0 if age==. | age < 0
gen age_d=age==0
gen agesq=age^2
label variable agesq "Age squared at interview"
gen agecu=age^3
label variable agecu "Age cubed at interview"

* Retirement-aged individuals
gen retirement=0
replace retirement=1 if age>=65 & male==1
replace retirement=1 if age>=60 & male==0
*replace retirement=0 if (age<60 & male==1) | (age<65 & male==1) 

* Schooling
************
recode w1_best_edu (-9/0=0) (1=1 "Grade 1") (2=2 "Grade 2") (3=3 "Grade 3") (4=4 "Grade 4") (5=5 "Grade 5") (6=6 "Grade 6") (7=7 "Grade 7") (8=8 "Grade 8") (9=9 "Grade 9") (10=10 "Grade 10") (11=11 "Grade 11") (12=12 "Grade 12") (13/15=12) (25=0 "No schooling") (16/24=12) (26/max=.), gen(schooling)
gen schooling_d=schooling!=.
gen schoolingsq=schooling^2
label variable schooling "Schooling"

* Education dummies
recode w1_best_edu  (-9/0=0) (0/12=0) (12/15=0) (25=0), gen(postschool)
replace postschool=0 if w1_best_edu<0 & w1_best_edu>=-9 & schooling<12
gen postschool_d=postschool!=.
tab postschool, gen(edu)
rename edu2 cert_nomat
rename edu3 dip_nomat
rename edu4 cert_mat
rename edu5 dip_mat
rename edu6 bachelors
rename edu7 bach_dip
rename edu8 honours
rename edu9 postgrad
rename edu10 othered

* A postschooling years variable
recode postschool (16/17=1) (18/19=1) (20=3) (21=4) (22=4) (23=6) (24=0), gen(postschoolyears)

* Work experience proxy
gen experience=age-6-schooling-postschoolyears

* Trade union membership
*setting all those who do not have a 'yes' to no (even those that are missing)
gen tradeunion=w1_a_em1tru==1
label variable tradeunion "Trade union membership"

*------------------------------------------------------------------------------------------------------------------------------------

* BIOCHILDREN VARIABLE (NUMBER OF BIOLOGICAL CHILDREN UNDER 15 STILL IN HOUSEHOLD)

* Counting the number of biological children 15 or younger still living in this household

gen biochildren=0
label variable biochildren "Number of biological children 15 or younger still living with this mother"

foreach x of numlist 1/16 {
cap destring w1_a_bhdob_y`x', replace
gen biobirth`x'year=w1_a_bhdob_y`x'
replace biobirth`x'year=. if biobirth`x'year>2008 & biobirth`x'year!=.
replace biobirth`x'year=. if biobirth`x'year<1900
replace biochildren=biochildren+1 if biobirth`x'year>=1993 & biobirth`x'year!=. &  w1_a_bhlive`x'==1
}

replace biochildren=. if biochildren==0 & male==1
replace biochildren=. if proxy==1

egen hh_biochildren=sum(biochildren), by(w1_hhid)

*-----------------------------------------------------------------------------------------------------------------------------------

* "OTHER" INCOME
recode w1_a_inco (1=1) (2=0), gen(othe_rec)
recode w1_a_inco_v (-9/-3=.), gen(othe)

*-----------------------------------------------------------------------------------------------------------------------------------

* MAIN WAGE VARAIBLES

* Employment variable
gen working=1 if w1_a_em1==1
replace working=1 if w1_p_emactcur_u==1
replace working=0 if working!=1 & response==1

* 2nd job?
recode w1_a_em2inc (-9/0=.), gen(fwag_g2)
label variable fwag_g2 "Gross pay from 2nd Job"
recode w1_a_em2pay (-9/0=.), gen(fwag_p2)
label variable fwag_p2 "Net pay from 2nd Job"
recode w1_a_em2inc_sh (-9/0=.) (1=0) (2=100) (3=350) (4=750) (5=1250) (6=2000) (7=3000) (8=4000) (9=5250) (10=7000) (11=9500) (12=13500) (13=23000) (14=40000) (15=75000) (16/max=.), gen(fwag_p_ib2)
gen fwag2=fwag_p2
replace fwag2=fwag_p_ib2 if fwag_p_ib2!=.
label variable fwag2 "Net pay from 2nd Job"
gen working2=1 if w1_a_em2==1 & fwag2!=.
replace working2=0 if working2!=1 & response==1

* Gross wage variable
recode w1_a_em1inc (-9/0=.), gen(fwag_g)
egen fwag_g_temp=rowtotal(fwag_g fwag_g2)
replace fwag_g=fwag_g_temp
drop fwag_g_temp
replace fwag_g=. if fwag_g==0
gen lnfwag_g=ln(fwag_g)
label variable fwag_g "Gross pay"
label variable lnfwag_g "Log of Gross pay"
gen fwag_g_d=fwag_g!=.
label variable fwag_g_d "Does person have non-missing gross pay data"
label values fwag_g_d dummy

* Net wage from income brackets and proxy income brackets
recode w1_a_em1inc_s (-9/0=.) (1=.) (2=100) (3=350) (4=750) (5=1250) (6=2000) (7=3000) (8=4000) (9=5250) (10=7000) (11=9500) (12=13500) (13=23000) (14=40000) (15=75000) (16/max=.), gen(fwag_ib)
recode w1_p_em1inc_sh (-9/0=.) (1=.) (2=100) (3=350) (4=750) (5=1250) (6=2000) (7=3000) (8=4000) (9=5250) (10=7000) (11=9500) (12=13500) (13=23000) (14=40000) (15=75000) (16/max=.), gen(fwag_pib)
replace fwag_pib=. if w1_p_emactcur_u!=1
gen fwag_pib_d=fwag_pib!=.
label values fwag_pib_d dummy
label variable fwag_pib_d "Dummy to indicate if net wages were sourced from PROXY income brackets"
replace fwag_ib=fwag_pib if proxy==1 & fwag_pib!=. & fwag_ib==.
gen fwag_ib_d=fwag_ib!=.
label values fwag_ib_d dummy
label variable fwag_ib_d "Dummy to indicate if net wages were sourced from income brackets"

* Net wages using point estimates only! 
recode w1_a_em1pay (-9/0=.), gen(fwag_p)
gen lnfwag_p=ln(fwag_p)
label variable fwag_p "Take home pay"
label variable lnfwag_p "Log of Take home pay"
gen fwag_p_d=fwag_p!=.
label variable fwag_p_d "Does person have non-missing take-home pay data"
label values fwag_p_d dummy

* Net wages incorporating income bracket and proxy income bracket net wages as well as second job wages
gen fwag=fwag_p
label variable fwag "Monthly take home pay from main job including BRACKETS"
replace fwag=fwag_ib if fwag_ib!=. & fwag==.
egen fwag_temp=rowtotal(fwag fwag2)
replace fwag=fwag_temp
drop fwag_temp
replace fwag=. if fwag==0
gen lnfwag=ln(fwag)
label variable lnfwag "Log of Take home pay including BRACKETS"
gen fwag_d=fwag!=.
label variable fwag_d "Does person have non-missing take-home pay data USING BRACKETS"
label values fwag_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* NUMBER OF HOURS WORKED PER WEEK/MONTH

*Working hours data
recode w1_a_em1hrs (-9/0=.), gen(weeklyhours)
recode w1_a_em2hrs  (-9/0=.), gen(weeklyhours2)
egen weeklyhours_temp=rowtotal(weeklyhours weeklyhours2)
replace weeklyhours=weeklyhours_temp
replace weeklyhours=. if weeklyhours>126 | weeklyhours==0
gen monthlyhours=weeklyhours*(22/5)
quietly sum monthlyhours, detail
return list
*This makes the assumption that all of those without hours data work the median of hours 
*This changes ALL of the proxies and 3 of the adult sample
gen imputemonthlyhours=0 if working==1
replace imputemonthlyhours=1 if monthlyhours==. & working==1
replace monthlyhours=r(p50) if monthlyhours==. & working==1

*-----------------------------------------------------------------------------------------------------------------------------------

* BONUS INCOME

* 13th Cheque data
gen cheq_pa_rec=w1_a_em1cheq==1
replace cheq_pa_rec=. if response==0
recode w1_a_em1cheq_a (-9/0=.), gen(cheq_pa)
replace cheq_pa_rec=1 if cheq_pa!=.
gen lncheq_pa=ln(cheq_pa)
gen cheq_pa_d=cheq_pa!=.
label variable cheq_pa_d "Does person have non-missing 13th cheque data"
label values cheq_pa_d dummy
gen cheq=cheq_pa/12

* Profit share
gen prof_pa_rec=w1_a_em1prf==1
replace prof_pa_rec=. if response==0
recode w1_a_em1prf_a (-9/0=.), gen(prof_pa)
replace prof_pa_rec=1 if prof_pa!=.
gen lnprof_pa=ln(prof_pa)
gen prof_pa_d=prof_pa!=.
label variable prof_pa_d "Does person have non-missing profit share data"
label values prof_pa_d dummy
gen prof=prof_pa/12
label variable prof "Monthly income from prof_pa from main job(i.e. 1/12 of prof_pa)"

* Bonus
gen bonu_pa_rec=w1_a_em1bon==1
replace bonu_pa_rec=. if response==0
recode w1_a_em1bon_a (-9/0=.), gen(bonu_pa)
replace bonu_pa_rec=1 if bonu_pa!=.
gen lnbonu_pa=ln(bonu_pa)
gen bonu_pa_d=bonu_pa!=.
label variable bonu_pa_d "Does person have non-missing other bonus data"
label values bonu_pa_d dummy
gen bonu=bonu_pa/12
label variable bonu "Monthly income from other bonuses from main job(i.e. 1/12 of other bonuses)"

* Extra
gen extr_rec=w1_a_em1pcrt==1
replace extr_rec=. if response==0
recode w1_a_em1pcrt_a (-9/0=.) , gen(extr)
label variable extr "Monthly income from extra payment on a piece rate basis"
replace extr_rec=1 if extr!=.
gen lnextr=ln(extr)
gen extr_d=extr!=.
label variable extr_d "Does person have non-missing extra payment data"
label values extr_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* CASUAL LABOUR VARIABLES

* Doing casual labour
gen cworking=w1_a_emc==1
replace cworking=. if response==0

* Wages from casual work
recode w1_a_emcinc (-9/0=.), gen(cwag_p)
label variable cwag_p "Pay from casual Job"
gen cwag_p_d=cwag_p!=.
label variable cwag_p_d "Does person have non-missing casual pay data"
label values cwag_p_d dummy
replace cworking=1 if cwag_p!=.
gen lncwag_p=ln(cwag_p)

* Wages from casual work brackets - only 22 people
recode w1_a_emcinc_sh (-9/0=.) (1=.) (2=100) (3=350) (4=750) (5=1250) (6=2000) (7=3000) (8=4000) (9=5250) (10=7000) (11=9500) (12=13500) (13=23000) (14=40000) (15=75000) (16/max=.), gen(cwag_p_ib)

* Wages from casual work including brackets
gen cwag=cwag_p
label variable cwag "Monthly take home pay from casual work including BRACKETS"
replace cwag=cwag_p_ib if cwag==. & cwag_p_ib!=.
label variable cwag_p "Pay from casual Job including brackets"
gen cwag_d=cwag!=.
label variable cwag_d "Does person have non-missing casual pay data including brackets"
label values cwag_d dummy
replace cworking=1 if cwag!=.
gen lncwag=ln(cwag)

* HOURLY casual wages variables
recode w1_a_emchrs (-9/0=.), gen(cmonthlyhours)
replace cmonthlyhours=. if cmonthlyhours>550 & cmonthlyhours!=.
quietly sum cmonthlyhours, detail
return list
* Assuming that all of those without hours data work the median of hours
gen cimputemonthlyhours=1 if cmonthlyhours==. & cworking==1
replace cimputemonthlyhours=1 if cimputemonthlyhours!=1 & cworking==1
replace cmonthlyhours=r(p50) if cmonthlyhours==. & cworking==1

*-----------------------------------------------------------------------------------------------------------------------------------

* SELF-EMPLOYMNET

* Self-employed
gen sworking=w1_a_ems==1
replace sworking=1 if w1_p_emactcur_u==2
replace sworking=. if response==0
replace sworking=0 if response==1 & sworking==.

* Earnings from self-employment
recode w1_a_emsincmn (-9/-3=.), gen(swag_p)
label variable swag_p "Earnings from self-employment"
gen swag_p_d=swag_p!=.
label variable swag_p_d "Does person have non-missing self-employment data"
label values swag_p_d dummy
gen lnswag_p=ln(swag_p)

* Wages from self-employment work brackets
recode w1_a_emsinc_sh (-9/0=.) (1=0) (2=100) (3=350) (4=750) (5=1250) (6=2000) (7=3000) (8=4000) (9=5250) (10=7000) (11=9500) (12=13500) (13=23000) (14=40000) (15=75000) (16/max=.), gen(swag_p_ib)
recode w1_p_em1inc_sh (-9/0=.) (1=0) (2=100) (3=350) (4=750) (5=1250) (6=2000) (7=3000) (8=4000) (9=5250) (10=7000) (11=9500) (12=13500) (13=23000) (14=40000) (15=75000) (16/max=.), gen(swag_p_pib)
replace swag_p_pib=. if w1_p_emactcur_u!=2
replace swag_p_ib=swag_p_pib if swag_p_pib!=. & swag_p_ib==.

* Wages from self-employmenmt work including brackets
gen swag=swag_p
label variable swag "Monthly income from self-employment including BRACKETS"
replace swag=swag_p_ib if swag==. & swag_p_ib!=.
label variable swag_p "Earnings from self-employment including brackets"
gen swag_d=swag!=.
label variable swag_d "Does person have non-missing self employment earnings data including brackets"
label values swag_d dummy
gen lnswag=ln(swag)

***Create HOURLY self-employment wages variables
recode w1_a_emshrs (-9/0=.), gen(sweeklyhours)
gen smonthlyhours=sweeklyhours*(22/5)
*replace smonthlyhours=. if smonthlyhours>550 & smonthlyhours!=. /*not neccessary*/
quietly sum smonthlyhours, detail
return list
*This makes the assumption that all of those without hours data work the median of hours 
gen simputemonthlyhours=1 if smonthlyhours==. & sworking==1
replace simputemonthlyhours=1 if simputemonthlyhours!=1 & sworking==1
replace smonthlyhours=r(p50) if smonthlyhours==. & sworking==1

*-----------------------------------------------------------------------------------------------------------------------------------

* INCOME FROM HELPING IN A FRIEND'S BUSINESS

*helpfriend
gen help_rec=1 if w1_a_emhearn==1
replace help_rec=0 if help_rec==. & response==1 /*this is assuming that missings are actually zeros*/

*earnings from helping friend
recode w1_a_emhearn_v (-9/-3=.) (333333=.), gen(help)
label variable help "Monthly income from helping friend's with their business"
replace help_rec=0 if help==0 /*Maybe this person gets sporadic cash and simply didnt receive any in the last month*/
replace help=. if help==0
replace help_rec=1 if help!=.
gen help_d=help!=.
label variable help_d "Does person have non-missing help-friend data"
label values help_d dummy
gen lnhelp=ln(help)

*hours of work helping friend
recode w1_a_emhhrs (-9/-3=.) (140/max=.), gen(hf_weeklyhours)
gen hf_monthlyhours=hf_weeklyhours*(22/5)
quietly sum hf_monthlyhours, detail
return list
*This makes the assumption that all of those without hours data work the median of hours 
gen imp_hf_monthlyhours=1 if hf_monthlyhours==. & help_rec==1
replace imp_hf_monthlyhours=1 if imp_hf_monthlyhours!=1 & help_rec==1
replace hf_monthlyhours=r(p50) if hf_monthlyhours==. & help_rec==1

*hourly earnings from helping friend
gen hr_help=help/hf_monthlyhours
gen lnhr_help=ln(hr_help)


*-----------------------------------------------------------------------------------------------------------------------------------

* STATE OLD AGE PENSION

*There are 181 people receiving pensions that are not above retirement age
*Levels 870 before April, 940 before october but after, 960 
*also it appears that some private pensions are mixed in... 
*have reallocated pensions above a 980 threshold (slight tolerance) to private pensions

* Receive State Old Age Pension
gen spen_rec=1 if w1_a_incgovpen==1
replace spen_rec=0 if spen_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

* Earnings from state (RSA) pension
recode w1_a_incgovpen_v (-9/-3=.), gen(spen)
label variable spen "Monthly income from state old age pension"
replace spen_rec=0 if spen==0
replace spen=. if spen==0
replace spen_rec=1 if spen!=.
gen spen_d=spen!=.
replace spen_rec=1 if spen_d==1
label variable spen_d "Does person have non-missing state pension data"
label values spen_d dummy

* Many people who answered this question are probably referring to other grants or private pensions
* a slight tolerance is given for measurement error (possibly recall bias)
replace spen_rec=0 if spen>980 & spen!=.
gen temp_ppen=spen if spen>980 & spen!=.
replace spen=. if temp_ppen!=.

gen lnspen=ln(spen)

*-----------------------------------------------------------------------------------------------------------------------------------

* PRIVATE OR FOREIGN PENSIONS

*There are 105 people receiving private/foreign pensions that are under 60!
*One outlier is set to missing - R150000 per month given the rest of their characteristics seems impossible

*received a private pension, foreign.. etc
gen ppen_rec=1 if w1_a_incppen==1 | w1_a_incret==1 | temp_ppen!=.
replace ppen_rec=0 if ppen_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*Income from private retirement funding
recode w1_a_incppen_v (-9/0=0) (150000=0) (.=0), gen(temp1)
recode w1_a_incret_v (-9/0=0) (.=0), gen(temp2)
recode temp_ppen (.=0)
gen ppen=temp1+temp2+temp_ppen
label variable ppen "Monthly income from private/foriegn penion pension and retirement annuities"
replace ppen=. if ppen==0
drop temp*
replace ppen_rec=1 if ppen!=. 
gen ppen_d=ppen!=.
label variable ppen_d "Does person have non-missing private retirement funding income data"
label values ppen_d dummy
gen lnppen=ln(ppen)

*-----------------------------------------------------------------------------------------------------------------------------------

* UNEMPLOYMENT INSURANCE FUND (UIF)

*Many of the people who are earning UIF are working.  Delayed claims? Fraud? Measurement error?
*Treating the data as if it is correct

***UIF RULES
*Max of 58% of daily fwag_p when were employed
*Can claim 1/6 days worked in employment spell when become unemployed
*Can claim for up to 238 (extended to 275 during 2009) days if worked for >4 years
*Can claim for 121 days on maternity leave

*received UIF
gen uif_rec=1 if w1_a_incuif==1
replace uif_rec=0 if uif_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from UIF
recode w1_a_incuif_v (-9/-3=.), gen(uif)
label variable uif "Monthly income from UIF payments"
replace uif=. if uif==50000 /*I have a hard time believing that this is not an outlier*/
replace uif_rec=0 if uif==0 
replace uif_rec=1 if uif!=.
replace uif=. if uif==0
gen uif_d=uif!=.
label variable uif_d "Does person have non-missing uif income data"
label values uif_d dummy
gen lnuif=ln(uif)

*-----------------------------------------------------------------------------------------------------------------------------------

* WORKMEN'S COMPENSATION

*received workmens compensation
gen comp_rec=1 if w1_a_incwc==1
replace comp_rec=0 if comp_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from workmens compensation
recode w1_a_incwc_v (-9/-3=.), gen(comp)
label variable comp "Monthly income from worker compensation payments"
replace comp=. if comp==0
replace comp_rec=1 if comp!=.
gen comp_d=comp!=.
label variable comp_d "Does person have non-missing uif income data"
label values comp_d dummy
gen lncomp=ln(comp)

*-----------------------------------------------------------------------------------------------------------------------------------

* DISABILITY GRANT

*received disability grant
gen dis_rec=1 if w1_a_incdis==1
replace dis_rec=0 if dis_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from disability grant
recode w1_a_incdis_v (-9/-3=.), gen(dis)
label variable dis "Monthly income from disability grant payments"
replace dis_rec=0 if dis==0 
replace dis_rec=1 if dis!=.
replace dis=. if dis==0
gen dis_d=dis!=.
label variable dis_d "Does person have non-missing disability grant data"
label values dis_d dummy
gen lndis=ln(dis)


*-----------------------------------------------------------------------------------------------------------------------------------

* CHILD SUPPORT GRANT

*This one makes more sense because people can receive grants for multiple children and it is means tested!
*It increased 10 on the 1st April and another 10 on the 1st of October

*received child support grant
gen chld_rec=1 if w1_a_incchld==1
replace chld_rec=0 if chld_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from child support grant
recode w1_a_incchld_v (-9/-3=.), gen(chld)
label variable chld "Monthly income from child support grant payments"
replace chld_rec=0 if chld==0 
replace chld_rec=1 if chld!=.
replace chld=. if chld==0
gen chld_d=chld!=.
label variable chld_d "Does person have non-missing child support grant data"
label values chld_d dummy
gen lnchld=ln(chld)

*number of children living with adult females
/*This will not be the same as biochildren which is the number of bio children under 15 still living with this adult female.  It uses different data and includes children of any age*/
recode w1_a_bhlive_n (-3/0=.), gen(biochild)
replace biochild=0 if  w1_a_bhbrth==2 | w1_a_bhlive==2
gen biochild_nodata=1 if biochild==. & chld_rec==1
replace biochild_nodata=0 if biochild!=. & chld_rec==1
replace biochild=0 if biochild==. & chld_rec==1
replace biochild=hhchildren if biochild>hhchildren & hhchildren!=. & biochild!=.

*Household level biochild variable
egen hh_biochild=sum(biochild), by(w1_hhid)

*-----------------------------------------------------------------------------------------------------------------------------------

* FOSTER CARE GRANT

*This one is tricky because I do not have a variable for non-biological children

*received foster care grant
gen fost_rec=1 if w1_a_incfos==1
replace fost_rec=0 if fost_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from foster care grant
recode w1_a_incfos_v (-9/-3=.), gen(fost)
label variable fost "Monthly income from foster child grant payments"
replace fost_rec=0 if fost==0 
replace fost_rec=1 if fost!=.
replace fost=. if fost==0
gen fost_d=fost!=.
label variable fost_d "Does person have non-missing foster care grant data"
label values fost_d dummy
gen lnfost=ln(fost)

*-----------------------------------------------------------------------------------------------------------------------------------

* CARE DEPENDANCY GRANT

*received care dependency grant
gen care_rec=1 if w1_a_inccare==1
replace care_rec=0 if care_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from care dependency grant
recode w1_a_inccare_v (-9/-3=.), gen(care)
label variable care "Monthly income from care dependency grant payments"
replace care_rec=0 if care==0 
replace care_rec=1 if care!=.
replace care=. if care==0
gen care_d=care!=.
label variable care_d "Does person have non-missing care dependency grant data"
label values care_d dummy

gen lncare=ln(care)

*-----------------------------------------------------------------------------------------------------------------------------------

* INTREST / DIVIDENT INCOME

*received interest/dividend income
gen indi_rec=1 if w1_a_incint==1
replace indi_rec=0 if indi_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from disability grant
recode w1_a_incint_v (-9/-3=.), gen(indi)
label variable indi "Monthly income from interest/dividends"
replace indi_rec=0 if indi==0 
replace indi_rec=1 if indi!=.
replace indi=. if indi==0
gen indi_d=indi!=.
label variable indi_d "Does person have non-missing interest/dividend income data"
label values indi_d dummy
gen lnindi=ln(indi)

*-----------------------------------------------------------------------------------------------------------------------------------

* INHERITANCE

*received inheritance income
gen inhe_rec=1 if w1_a_incinh==1
replace inhe_rec=0 if inhe_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from inheritance income
recode w1_a_incinh_v (-9/-3=.), gen(inhe)
label variable inhe "Monthly income from inheritances"
replace inhe_rec=0 if inhe==0 
replace inhe_rec=1 if inhe!=.
replace inhe=. if inhe==0
gen inhe_d=inhe!=.
label variable inhe_d "Does person have non-missing inheritance income data"
label values inhe_d dummy
gen lninhe=ln(inhe)

*-----------------------------------------------------------------------------------------------------------------------------------

* WAR VETERANS PENSION

*there are 4: 1xmissing, 1x140, 1x1600, 1x11000...
*NONE OF THESE IS A CORRECT NUMBER ACCORDING TO SASSA (870+20 before april, 940+20 after)
*Only one of them is of a suitable age to receive a war pension (81), the others are 30's - 50's
*Put in with 'other' income
gen war_rec=1 if w1_a_incwar==1
replace war_rec=0 if war_rec!=1 & response==1
recode w1_a_incwar_v (-9/0=.), gen(war_income)
replace othe=war_income if war_rec==1
replace othe_rec=war_rec if war_rec==1

*-----------------------------------------------------------------------------------------------------------------------------------

* RENTAL INCOME

*received rental income
gen rnt_rec=1 if w1_a_incrnt==1
replace rnt_rec=0 if rnt_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*rental income
recode w1_a_incrnt_v (-9/-3=.), gen(rnt)
label variable rnt "Monthly income from rentals"
replace rnt_rec=0 if rnt==0 
replace rnt_rec=1 if rnt!=.
replace rnt=. if rnt==0
gen rnt_d=rnt!=.
label variable rnt_d "Does person have non-missing rental income data"
label values rnt_d dummy
gen lnrnt=ln(rnt)


*-----------------------------------------------------------------------------------------------------------------------------------

* RETRENCHMENT / RETIREMENT PACKAGE

*received retrenchment payment
gen retr_rec=1 if w1_a_incretr==1 | w1_a_incretp==1
replace retr_rec=0 if retr_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from retrenchment package
recode w1_a_incretr_v (-9/0=0) (.=0), gen(temp1)
recode w1_a_incretp_v(-9/0=0) (.=0), gen(temp2)
gen retr=temp1+temp2
label variable retr "Monthly income from retrenchment payments"
replace retr=. if retr==0
drop temp*
replace retr_rec=1 if retr!=.
gen retr_d=retr!=.
label variable retr_d "Does person have non-missing retrenchment payment data"
label values retr_d dummy
gen lnretr=ln(retr)

*-----------------------------------------------------------------------------------------------------------------------------------

* LABOLA / BRIDE WEALTH PAYMENTS

*received lobola/bride wealth payment
gen brid_rec=1 if w1_a_inclob==1
replace brid_rec=0 if brid_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income lobola/bride wealth payment
recode w1_a_inclob_v (-9/-3=.), gen(brid)
label variable brid "Monthly income from lobola/bride wealth payments"
replace brid_rec=0 if brid==0 
replace brid_rec=1 if brid!=.
replace brid=. if brid==0
gen brid_d=brid!=.
label variable brid_d "Does person have non-missing lobola/bride wealth payment data"
label values brid_d dummy
gen lnbrid=ln(brid)

*-----------------------------------------------------------------------------------------------------------------------------------

* GIFT INCOME

*received gift income
gen gift_rec=1 if w1_a_incgif==1
replace gift_rec=0 if gift_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from gifts
recode w1_a_incgif_v (-9/-3=.), gen(gift)
label variable gift "Monthly income from gifts"
replace gift_rec=0 if gift==0 
replace gift_rec=1 if gift!=.
replace gift=. if gift==0
gen gift_d=gift!=.
label variable gift_d "Does person have non-missing gift income data"
label values gift_d dummy
gen lngift=ln(gift)

*-----------------------------------------------------------------------------------------------------------------------------------

* REPAYMENT OF LOANS TO YOU

*received repayment of loans income
gen loan_rec=1 if w1_a_incloan==1
replace loan_rec=0 if loan_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from repayments of loans to you
recode w1_a_incloan_v (-9/-3=.), gen(loan)
label variable loan "Monthly 'income' from loan repayments"
replace loan_rec=0 if loan==0 
replace loan_rec=1 if loan!=.
replace loan=. if loan==0
gen loan_d=loan!=.
label variable loan_d "Does person have non-missing repayment of loans income data"
label values loan_d dummy
gen lnloan=ln(loan)

*-----------------------------------------------------------------------------------------------------------------------------------

* SALE OF HOUSEHOLD GOODS

*received income from sale of household goods
gen sale_rec=1 if w1_a_incsale==1
replace sale_rec=0 if sale_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from sale of household goods
recode w1_a_incsale_v (-9/-3=.), gen(sale)
label variable sale "Monthly 'income' from sale of household assets"
replace sale_rec=0 if sale==0 
replace sale_rec=1 if sale!=.
replace sale=. if sale==0
gen sale_d=sale!=.
label variable sale_d "Does person have non-missing sale of household goods income data"
label values sale_d dummy
gen lnsale=ln(sale)

*-----------------------------------------------------------------------------------------------------------------------------------

* REMITTANCES 

*received remittance income
gen remt_rec=1 if w1_a_cr==1
replace remt_rec=0 if remt_rec!=1 & response==1 /*this is assuming that missings are actually zeros*/

*income from remittances

*Money remittances
foreach x of numlist 1/8 {
recode w1_a_crmnv`x' (-9/0=.), gen(temp`x')
}

*Values of other remittances
gen temp=1
while temp<=8 {
local x=temp
local y=temp+8
recode w1_a_crkndmnv`x' (-9/0=.), gen(temp`y')
replace temp=temp+1
}
drop temp

*Putting it all together
egen remt=rowtotal(temp*)
label variable remt "Monthly income from all remittance payments"
drop temp*
replace remt=. if remt==0
replace remt_rec=1 if remt>0 & remt!=.
gen remt_d=remt!=.
label variable remt_d "Does person have non-missing remittances received data"
label values remt_d dummy
gen lnremt=ln(remt)

*-----------------------------------------------------------------------------------------------------------------------------------

* HOUSEHOLD VARIABLES FOR HOUSEHOLD LEVEL INCOME IMPUTATIONS

*Household one-shot income
gen hhq_incb_d=hhq_incb!=.
gen hhq_inc_d=hhq_inc!=.
label variable hhq_inc_d "Does person have non-missing household income data"
label values hhq_inc_d dummy
replace hhq_inc=. if hhq_inc==0
replace hhq_incb=. if hhq_incb==0
gen lnhhq_inc=ln(hhq_inc)
gen lnhhq_incb=ln(hhq_incb)

*household mode race
egen hhrace=mode(race), by(w1_hhid) minmode
egen hhracetemp=mode(race), by(w1_hhid) maxmode
replace hhrace=hhracetemp if hhrace==.
drop hhracetemp
gen hhrace_d=hhrace!=.
replace hhrace=0 if hhrace==.

*Maximum household education
egen hhedu=max(schooling), by(w1_hhid)
egen hhpostedu=max(postschool), by(w1_hhid)
recode hhpostedu (16/17=12) (18/19=13) (20/21=15) (22=16) (23=17) (24=.)
replace hhedu=hhpostedu if hhpostedu!=0
gen hhedusq=hhedu^2

*Median household age
egen hhage=median(age) if age>=0 & age<110, by(w1_hhid)
gen hhage_d=hhage!=.
replace hhage=0 if hhage==.

*Dummy variable for a trade union member present in household
egen hhtu=max(tradeunion), by(w1_hhid)

save "$DataOUT\prepdata.dta", replace

* end of do file 

*=====================================================================================================================================

